summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/CMakeLists.txt2
-rw-r--r--src/core/hle/kernel/k_hardware_timer.h9
-rw-r--r--src/core/hle/kernel/k_resource_limit.cpp11
-rw-r--r--src/core/hle/kernel/k_resource_limit.h3
-rw-r--r--src/core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h2
-rw-r--r--src/core/hle/kernel/kernel.cpp2
-rw-r--r--src/core/hle/kernel/svc/svc_address_arbiter.cpp3
-rw-r--r--src/core/hle/kernel/svc/svc_condition_variable.cpp3
-rw-r--r--src/core/hle/kernel/svc/svc_ipc.cpp20
-rw-r--r--src/core/hle/kernel/svc/svc_resource_limit.cpp2
-rw-r--r--src/core/hle/kernel/svc/svc_synchronization.cpp16
-rw-r--r--src/core/hle/kernel/svc/svc_thread.cpp29
-rw-r--r--src/core/hle/service/sockets/sockets.h2
-rw-r--r--src/core/hle/service/sockets/sockets_translate.cpp4
-rw-r--r--src/core/internal_network/network.cpp35
-rw-r--r--src/core/internal_network/network.h2
-rw-r--r--src/video_core/host_shaders/CMakeLists.txt1
-rw-r--r--src/video_core/host_shaders/vulkan_depthstencil_clear.frag12
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp7
-rw-r--r--src/video_core/renderer_vulkan/blit_image.cpp79
-rw-r--r--src/video_core/renderer_vulkan/blit_image.h19
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp37
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h6
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp8
25 files changed, 260 insertions, 56 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 2da983cad..7bb88c8ea 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -134,7 +134,7 @@ else()
endif()
# GCC bugs
- if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "12" AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+ if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "11" AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
# These diagnostics would be great if they worked, but are just completely broken
# and produce bogus errors on external libraries like fmt.
add_compile_options(
diff --git a/src/core/hle/kernel/k_hardware_timer.h b/src/core/hle/kernel/k_hardware_timer.h
index 00bef6ea1..27f43cd19 100644
--- a/src/core/hle/kernel/k_hardware_timer.h
+++ b/src/core/hle/kernel/k_hardware_timer.h
@@ -19,13 +19,7 @@ public:
void Initialize();
void Finalize();
- s64 GetCount() const {
- return GetTick();
- }
-
- void RegisterTask(KTimerTask* task, s64 time_from_now) {
- this->RegisterAbsoluteTask(task, GetTick() + time_from_now);
- }
+ s64 GetTick() const;
void RegisterAbsoluteTask(KTimerTask* task, s64 task_time) {
KScopedDisableDispatch dd{m_kernel};
@@ -42,7 +36,6 @@ private:
void EnableInterrupt(s64 wakeup_time);
void DisableInterrupt();
bool GetInterruptEnabled();
- s64 GetTick() const;
void DoTask();
private:
diff --git a/src/core/hle/kernel/k_resource_limit.cpp b/src/core/hle/kernel/k_resource_limit.cpp
index fcee26a29..d8a63aaf8 100644
--- a/src/core/hle/kernel/k_resource_limit.cpp
+++ b/src/core/hle/kernel/k_resource_limit.cpp
@@ -5,6 +5,7 @@
#include "common/overflow.h"
#include "core/core.h"
#include "core/core_timing.h"
+#include "core/hle/kernel/k_hardware_timer.h"
#include "core/hle/kernel/k_resource_limit.h"
#include "core/hle/kernel/svc_results.h"
@@ -15,9 +16,7 @@ KResourceLimit::KResourceLimit(KernelCore& kernel)
: KAutoObjectWithSlabHeapAndContainer{kernel}, m_lock{m_kernel}, m_cond_var{m_kernel} {}
KResourceLimit::~KResourceLimit() = default;
-void KResourceLimit::Initialize(const Core::Timing::CoreTiming* core_timing) {
- m_core_timing = core_timing;
-}
+void KResourceLimit::Initialize() {}
void KResourceLimit::Finalize() {}
@@ -86,7 +85,7 @@ Result KResourceLimit::SetLimitValue(LimitableResource which, s64 value) {
}
bool KResourceLimit::Reserve(LimitableResource which, s64 value) {
- return Reserve(which, value, m_core_timing->GetGlobalTimeNs().count() + DefaultTimeout);
+ return Reserve(which, value, m_kernel.HardwareTimer().GetTick() + DefaultTimeout);
}
bool KResourceLimit::Reserve(LimitableResource which, s64 value, s64 timeout) {
@@ -117,7 +116,7 @@ bool KResourceLimit::Reserve(LimitableResource which, s64 value, s64 timeout) {
}
if (m_current_hints[index] + value <= m_limit_values[index] &&
- (timeout < 0 || m_core_timing->GetGlobalTimeNs().count() < timeout)) {
+ (timeout < 0 || m_kernel.HardwareTimer().GetTick() < timeout)) {
m_waiter_count++;
m_cond_var.Wait(std::addressof(m_lock), timeout, false);
m_waiter_count--;
@@ -154,7 +153,7 @@ void KResourceLimit::Release(LimitableResource which, s64 value, s64 hint) {
KResourceLimit* CreateResourceLimitForProcess(Core::System& system, s64 physical_memory_size) {
auto* resource_limit = KResourceLimit::Create(system.Kernel());
- resource_limit->Initialize(std::addressof(system.CoreTiming()));
+ resource_limit->Initialize();
// Initialize default resource limit values.
// TODO(bunnei): These values are the system defaults, the limits for service processes are
diff --git a/src/core/hle/kernel/k_resource_limit.h b/src/core/hle/kernel/k_resource_limit.h
index 15e69af56..b733ec8f8 100644
--- a/src/core/hle/kernel/k_resource_limit.h
+++ b/src/core/hle/kernel/k_resource_limit.h
@@ -31,7 +31,7 @@ public:
explicit KResourceLimit(KernelCore& kernel);
~KResourceLimit() override;
- void Initialize(const Core::Timing::CoreTiming* core_timing);
+ void Initialize();
void Finalize() override;
s64 GetLimitValue(LimitableResource which) const;
@@ -57,7 +57,6 @@ private:
mutable KLightLock m_lock;
s32 m_waiter_count{};
KLightConditionVariable m_cond_var;
- const Core::Timing::CoreTiming* m_core_timing{};
};
KResourceLimit* CreateResourceLimitForProcess(Core::System& system, s64 physical_memory_size);
diff --git a/src/core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h b/src/core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h
index c485022f5..b62415da7 100644
--- a/src/core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h
+++ b/src/core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h
@@ -28,7 +28,7 @@ public:
~KScopedSchedulerLockAndSleep() {
// Register the sleep.
if (m_timeout_tick > 0) {
- m_timer->RegisterTask(m_thread, m_timeout_tick);
+ m_timer->RegisterAbsoluteTask(m_thread, m_timeout_tick);
}
// Unlock the scheduler.
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index ebe7582c6..a1134b7e2 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -231,7 +231,7 @@ struct KernelCore::Impl {
void InitializeSystemResourceLimit(KernelCore& kernel,
const Core::Timing::CoreTiming& core_timing) {
system_resource_limit = KResourceLimit::Create(system.Kernel());
- system_resource_limit->Initialize(&core_timing);
+ system_resource_limit->Initialize();
KResourceLimit::Register(kernel, system_resource_limit);
const auto sizes{memory_layout->GetTotalAndKernelMemorySizes()};
diff --git a/src/core/hle/kernel/svc/svc_address_arbiter.cpp b/src/core/hle/kernel/svc/svc_address_arbiter.cpp
index 04cc5ea64..90ee43521 100644
--- a/src/core/hle/kernel/svc/svc_address_arbiter.cpp
+++ b/src/core/hle/kernel/svc/svc_address_arbiter.cpp
@@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "core/core.h"
+#include "core/hle/kernel/k_hardware_timer.h"
#include "core/hle/kernel/k_memory_layout.h"
#include "core/hle/kernel/k_process.h"
#include "core/hle/kernel/kernel.h"
@@ -52,7 +53,7 @@ Result WaitForAddress(Core::System& system, u64 address, ArbitrationType arb_typ
if (timeout_ns > 0) {
const s64 offset_tick(timeout_ns);
if (offset_tick > 0) {
- timeout = offset_tick + 2;
+ timeout = system.Kernel().HardwareTimer().GetTick() + offset_tick + 2;
if (timeout <= 0) {
timeout = std::numeric_limits<s64>::max();
}
diff --git a/src/core/hle/kernel/svc/svc_condition_variable.cpp b/src/core/hle/kernel/svc/svc_condition_variable.cpp
index ca120d67e..bb678e6c5 100644
--- a/src/core/hle/kernel/svc/svc_condition_variable.cpp
+++ b/src/core/hle/kernel/svc/svc_condition_variable.cpp
@@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "core/core.h"
+#include "core/hle/kernel/k_hardware_timer.h"
#include "core/hle/kernel/k_memory_layout.h"
#include "core/hle/kernel/k_process.h"
#include "core/hle/kernel/kernel.h"
@@ -25,7 +26,7 @@ Result WaitProcessWideKeyAtomic(Core::System& system, u64 address, u64 cv_key, u
if (timeout_ns > 0) {
const s64 offset_tick(timeout_ns);
if (offset_tick > 0) {
- timeout = offset_tick + 2;
+ timeout = system.Kernel().HardwareTimer().GetTick() + offset_tick + 2;
if (timeout <= 0) {
timeout = std::numeric_limits<s64>::max();
}
diff --git a/src/core/hle/kernel/svc/svc_ipc.cpp b/src/core/hle/kernel/svc/svc_ipc.cpp
index 373ae7c8d..6b5e1cb8d 100644
--- a/src/core/hle/kernel/svc/svc_ipc.cpp
+++ b/src/core/hle/kernel/svc/svc_ipc.cpp
@@ -5,6 +5,7 @@
#include "common/scratch_buffer.h"
#include "core/core.h"
#include "core/hle/kernel/k_client_session.h"
+#include "core/hle/kernel/k_hardware_timer.h"
#include "core/hle/kernel/k_process.h"
#include "core/hle/kernel/k_server_session.h"
#include "core/hle/kernel/svc.h"
@@ -82,12 +83,29 @@ Result ReplyAndReceive(Core::System& system, s32* out_index, uint64_t handles_ad
R_TRY(session->SendReply());
}
+ // Convert the timeout from nanoseconds to ticks.
+ // NOTE: Nintendo does not use this conversion logic in WaitSynchronization...
+ s64 timeout;
+ if (timeout_ns > 0) {
+ const s64 offset_tick(timeout_ns);
+ if (offset_tick > 0) {
+ timeout = kernel.HardwareTimer().GetTick() + offset_tick + 2;
+ if (timeout <= 0) {
+ timeout = std::numeric_limits<s64>::max();
+ }
+ } else {
+ timeout = std::numeric_limits<s64>::max();
+ }
+ } else {
+ timeout = timeout_ns;
+ }
+
// Wait for a message.
while (true) {
// Wait for an object.
s32 index;
Result result = KSynchronizationObject::Wait(kernel, std::addressof(index), objs.data(),
- num_handles, timeout_ns);
+ num_handles, timeout);
if (result == ResultTimedOut) {
R_RETURN(result);
}
diff --git a/src/core/hle/kernel/svc/svc_resource_limit.cpp b/src/core/hle/kernel/svc/svc_resource_limit.cpp
index 732bc017e..c8e820b6a 100644
--- a/src/core/hle/kernel/svc/svc_resource_limit.cpp
+++ b/src/core/hle/kernel/svc/svc_resource_limit.cpp
@@ -21,7 +21,7 @@ Result CreateResourceLimit(Core::System& system, Handle* out_handle) {
SCOPE_EXIT({ resource_limit->Close(); });
// Initialize the resource limit.
- resource_limit->Initialize(std::addressof(system.CoreTiming()));
+ resource_limit->Initialize();
// Register the limit.
KResourceLimit::Register(kernel, resource_limit);
diff --git a/src/core/hle/kernel/svc/svc_synchronization.cpp b/src/core/hle/kernel/svc/svc_synchronization.cpp
index 366e8ed4a..8ebc1bd1c 100644
--- a/src/core/hle/kernel/svc/svc_synchronization.cpp
+++ b/src/core/hle/kernel/svc/svc_synchronization.cpp
@@ -4,6 +4,7 @@
#include "common/scope_exit.h"
#include "common/scratch_buffer.h"
#include "core/core.h"
+#include "core/hle/kernel/k_hardware_timer.h"
#include "core/hle/kernel/k_process.h"
#include "core/hle/kernel/k_readable_event.h"
#include "core/hle/kernel/svc.h"
@@ -83,9 +84,20 @@ Result WaitSynchronization(Core::System& system, int32_t* out_index, u64 user_ha
}
});
+ // Convert the timeout from nanoseconds to ticks.
+ s64 timeout;
+ if (timeout_ns > 0) {
+ u64 ticks = kernel.HardwareTimer().GetTick();
+ ticks += timeout_ns;
+ ticks += 2;
+
+ timeout = ticks;
+ } else {
+ timeout = timeout_ns;
+ }
+
// Wait on the objects.
- Result res =
- KSynchronizationObject::Wait(kernel, out_index, objs.data(), num_handles, timeout_ns);
+ Result res = KSynchronizationObject::Wait(kernel, out_index, objs.data(), num_handles, timeout);
R_SUCCEED_IF(res == ResultSessionClosed);
R_RETURN(res);
diff --git a/src/core/hle/kernel/svc/svc_thread.cpp b/src/core/hle/kernel/svc/svc_thread.cpp
index 92bcea72b..933b82e30 100644
--- a/src/core/hle/kernel/svc/svc_thread.cpp
+++ b/src/core/hle/kernel/svc/svc_thread.cpp
@@ -4,6 +4,7 @@
#include "common/scope_exit.h"
#include "core/core.h"
#include "core/core_timing.h"
+#include "core/hle/kernel/k_hardware_timer.h"
#include "core/hle/kernel/k_process.h"
#include "core/hle/kernel/k_scoped_resource_reservation.h"
#include "core/hle/kernel/k_thread.h"
@@ -42,9 +43,9 @@ Result CreateThread(Core::System& system, Handle* out_handle, u64 entry_point, u
R_UNLESS(process.CheckThreadPriority(priority), ResultInvalidPriority);
// Reserve a new thread from the process resource limit (waiting up to 100ms).
- KScopedResourceReservation thread_reservation(
- std::addressof(process), LimitableResource::ThreadCountMax, 1,
- system.CoreTiming().GetGlobalTimeNs().count() + 100000000);
+ KScopedResourceReservation thread_reservation(std::addressof(process),
+ LimitableResource::ThreadCountMax, 1,
+ kernel.HardwareTimer().GetTick() + 100000000);
R_UNLESS(thread_reservation.Succeeded(), ResultLimitReached);
// Create the thread.
@@ -102,20 +103,31 @@ void ExitThread(Core::System& system) {
}
/// Sleep the current thread
-void SleepThread(Core::System& system, s64 nanoseconds) {
+void SleepThread(Core::System& system, s64 ns) {
auto& kernel = system.Kernel();
- const auto yield_type = static_cast<Svc::YieldType>(nanoseconds);
+ const auto yield_type = static_cast<Svc::YieldType>(ns);
- LOG_TRACE(Kernel_SVC, "called nanoseconds={}", nanoseconds);
+ LOG_TRACE(Kernel_SVC, "called nanoseconds={}", ns);
// When the input tick is positive, sleep.
- if (nanoseconds > 0) {
+ if (ns > 0) {
// Convert the timeout from nanoseconds to ticks.
// NOTE: Nintendo does not use this conversion logic in WaitSynchronization...
+ s64 timeout;
+
+ const s64 offset_tick(ns);
+ if (offset_tick > 0) {
+ timeout = kernel.HardwareTimer().GetTick() + offset_tick + 2;
+ if (timeout <= 0) {
+ timeout = std::numeric_limits<s64>::max();
+ }
+ } else {
+ timeout = std::numeric_limits<s64>::max();
+ }
// Sleep.
// NOTE: Nintendo does not check the result of this sleep.
- static_cast<void>(GetCurrentThread(kernel).Sleep(nanoseconds));
+ static_cast<void>(GetCurrentThread(kernel).Sleep(timeout));
} else if (yield_type == Svc::YieldType::WithoutCoreMigration) {
KScheduler::YieldWithoutCoreMigration(kernel);
} else if (yield_type == Svc::YieldType::WithCoreMigration) {
@@ -124,7 +136,6 @@ void SleepThread(Core::System& system, s64 nanoseconds) {
KScheduler::YieldToAnyThread(kernel);
} else {
// Nintendo does nothing at all if an otherwise invalid value is passed.
- ASSERT_MSG(false, "Unimplemented sleep yield type '{:016X}'!", nanoseconds);
}
}
diff --git a/src/core/hle/service/sockets/sockets.h b/src/core/hle/service/sockets/sockets.h
index 77426c46e..f86af01a4 100644
--- a/src/core/hle/service/sockets/sockets.h
+++ b/src/core/hle/service/sockets/sockets.h
@@ -18,7 +18,9 @@ enum class Errno : u32 {
AGAIN = 11,
INVAL = 22,
MFILE = 24,
+ PIPE = 32,
MSGSIZE = 90,
+ CONNABORTED = 103,
CONNRESET = 104,
NOTCONN = 107,
TIMEDOUT = 110,
diff --git a/src/core/hle/service/sockets/sockets_translate.cpp b/src/core/hle/service/sockets/sockets_translate.cpp
index c1187209f..aed05250c 100644
--- a/src/core/hle/service/sockets/sockets_translate.cpp
+++ b/src/core/hle/service/sockets/sockets_translate.cpp
@@ -23,10 +23,14 @@ Errno Translate(Network::Errno value) {
return Errno::INVAL;
case Network::Errno::MFILE:
return Errno::MFILE;
+ case Network::Errno::PIPE:
+ return Errno::PIPE;
case Network::Errno::NOTCONN:
return Errno::NOTCONN;
case Network::Errno::TIMEDOUT:
return Errno::TIMEDOUT;
+ case Network::Errno::CONNABORTED:
+ return Errno::CONNABORTED;
case Network::Errno::CONNRESET:
return Errno::CONNRESET;
case Network::Errno::INPROGRESS:
diff --git a/src/core/internal_network/network.cpp b/src/core/internal_network/network.cpp
index bda9fa2e0..5d28300e6 100644
--- a/src/core/internal_network/network.cpp
+++ b/src/core/internal_network/network.cpp
@@ -39,6 +39,11 @@ namespace Network {
namespace {
+enum class CallType {
+ Send,
+ Other,
+};
+
#ifdef _WIN32
using socklen_t = int;
@@ -96,7 +101,7 @@ bool EnableNonBlock(SOCKET fd, bool enable) {
return ioctlsocket(fd, FIONBIO, &value) != SOCKET_ERROR;
}
-Errno TranslateNativeError(int e) {
+Errno TranslateNativeError(int e, CallType call_type = CallType::Other) {
switch (e) {
case 0:
return Errno::SUCCESS;
@@ -112,6 +117,14 @@ Errno TranslateNativeError(int e) {
return Errno::AGAIN;
case WSAECONNREFUSED:
return Errno::CONNREFUSED;
+ case WSAECONNABORTED:
+ if (call_type == CallType::Send) {
+ // Winsock yields WSAECONNABORTED from `send` in situations where Unix
+ // systems, and actual Switches, yield EPIPE.
+ return Errno::PIPE;
+ } else {
+ return Errno::CONNABORTED;
+ }
case WSAECONNRESET:
return Errno::CONNRESET;
case WSAEHOSTUNREACH:
@@ -198,7 +211,7 @@ bool EnableNonBlock(int fd, bool enable) {
return fcntl(fd, F_SETFL, flags) == 0;
}
-Errno TranslateNativeError(int e) {
+Errno TranslateNativeError(int e, CallType call_type = CallType::Other) {
switch (e) {
case 0:
return Errno::SUCCESS;
@@ -208,6 +221,10 @@ Errno TranslateNativeError(int e) {
return Errno::INVAL;
case EMFILE:
return Errno::MFILE;
+ case EPIPE:
+ return Errno::PIPE;
+ case ECONNABORTED:
+ return Errno::CONNABORTED;
case ENOTCONN:
return Errno::NOTCONN;
case EAGAIN:
@@ -236,13 +253,13 @@ Errno TranslateNativeError(int e) {
#endif
-Errno GetAndLogLastError() {
+Errno GetAndLogLastError(CallType call_type = CallType::Other) {
#ifdef _WIN32
int e = WSAGetLastError();
#else
int e = errno;
#endif
- const Errno err = TranslateNativeError(e);
+ const Errno err = TranslateNativeError(e, call_type);
if (err == Errno::AGAIN || err == Errno::TIMEDOUT || err == Errno::INPROGRESS) {
// These happen during normal operation, so only log them at debug level.
LOG_DEBUG(Network, "Socket operation error: {}", Common::NativeErrorToString(e));
@@ -731,13 +748,17 @@ std::pair<s32, Errno> Socket::Send(std::span<const u8> message, int flags) {
ASSERT(message.size() < static_cast<size_t>(std::numeric_limits<int>::max()));
ASSERT(flags == 0);
+ int native_flags = 0;
+#if YUZU_UNIX
+ native_flags |= MSG_NOSIGNAL; // do not send us SIGPIPE
+#endif
const auto result = send(fd, reinterpret_cast<const char*>(message.data()),
- static_cast<int>(message.size()), 0);
+ static_cast<int>(message.size()), native_flags);
if (result != SOCKET_ERROR) {
return {static_cast<s32>(result), Errno::SUCCESS};
}
- return {-1, GetAndLogLastError()};
+ return {-1, GetAndLogLastError(CallType::Send)};
}
std::pair<s32, Errno> Socket::SendTo(u32 flags, std::span<const u8> message,
@@ -759,7 +780,7 @@ std::pair<s32, Errno> Socket::SendTo(u32 flags, std::span<const u8> message,
return {static_cast<s32>(result), Errno::SUCCESS};
}
- return {-1, GetAndLogLastError()};
+ return {-1, GetAndLogLastError(CallType::Send)};
}
Errno Socket::Close() {
diff --git a/src/core/internal_network/network.h b/src/core/internal_network/network.h
index badcb8369..c7e20ae34 100644
--- a/src/core/internal_network/network.h
+++ b/src/core/internal_network/network.h
@@ -33,10 +33,12 @@ enum class Errno {
BADF,
INVAL,
MFILE,
+ PIPE,
NOTCONN,
AGAIN,
CONNREFUSED,
CONNRESET,
+ CONNABORTED,
HOSTUNREACH,
NETDOWN,
NETUNREACH,
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index e61d9af80..c4d459077 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -50,6 +50,7 @@ set(SHADER_FILES
vulkan_blit_depth_stencil.frag
vulkan_color_clear.frag
vulkan_color_clear.vert
+ vulkan_depthstencil_clear.frag
vulkan_fidelityfx_fsr_easu_fp16.comp
vulkan_fidelityfx_fsr_easu_fp32.comp
vulkan_fidelityfx_fsr_rcas_fp16.comp
diff --git a/src/video_core/host_shaders/vulkan_depthstencil_clear.frag b/src/video_core/host_shaders/vulkan_depthstencil_clear.frag
new file mode 100644
index 000000000..1ac177c7e
--- /dev/null
+++ b/src/video_core/host_shaders/vulkan_depthstencil_clear.frag
@@ -0,0 +1,12 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#version 460 core
+
+layout (push_constant) uniform PushConstants {
+ vec4 clear_depth;
+};
+
+void main() {
+ gl_FragDepth = clear_depth.x;
+}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index aadd6967c..1ba31be88 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -1335,7 +1335,8 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info,
}
const u32 buffer_size = static_cast<u32>(buffer_operand.pitch * buffer_operand.height);
static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize;
- const auto post_op = VideoCommon::ObtainBufferOperation::DoNothing;
+ const auto post_op = IS_IMAGE_UPLOAD ? VideoCommon::ObtainBufferOperation::DoNothing
+ : VideoCommon::ObtainBufferOperation::MarkAsWritten;
const auto [buffer, offset] =
buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op);
@@ -1344,8 +1345,12 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info,
const std::span copy_span{&copy, 1};
if constexpr (IS_IMAGE_UPLOAD) {
+ texture_cache.PrepareImage(image_id, true, false);
image->UploadMemory(buffer->Handle(), offset, copy_span);
} else {
+ if (offset % BytesPerBlock(image->info.format)) {
+ return false;
+ }
texture_cache.DownloadImageIntoBuffer(image, buffer->Handle(), offset, copy_span,
buffer_operand.address, buffer_size);
}
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp
index f74ae972e..1032c9d12 100644
--- a/src/video_core/renderer_vulkan/blit_image.cpp
+++ b/src/video_core/renderer_vulkan/blit_image.cpp
@@ -16,6 +16,7 @@
#include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h"
#include "video_core/host_shaders/vulkan_color_clear_frag_spv.h"
#include "video_core/host_shaders/vulkan_color_clear_vert_spv.h"
+#include "video_core/host_shaders/vulkan_depthstencil_clear_frag_spv.h"
#include "video_core/renderer_vulkan/blit_image.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
@@ -428,6 +429,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_,
blit_depth_stencil_frag(BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV)),
clear_color_vert(BuildShader(device, VULKAN_COLOR_CLEAR_VERT_SPV)),
clear_color_frag(BuildShader(device, VULKAN_COLOR_CLEAR_FRAG_SPV)),
+ clear_stencil_frag(BuildShader(device, VULKAN_DEPTHSTENCIL_CLEAR_FRAG_SPV)),
convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)),
convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)),
convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)),
@@ -593,6 +595,28 @@ void BlitImageHelper::ClearColor(const Framebuffer* dst_framebuffer, u8 color_ma
scheduler.InvalidateState();
}
+void BlitImageHelper::ClearDepthStencil(const Framebuffer* dst_framebuffer, bool depth_clear,
+ f32 clear_depth, u8 stencil_mask, u32 stencil_ref,
+ u32 stencil_compare_mask, const Region2D& dst_region) {
+ const BlitDepthStencilPipelineKey key{
+ .renderpass = dst_framebuffer->RenderPass(),
+ .depth_clear = depth_clear,
+ .stencil_mask = stencil_mask,
+ .stencil_compare_mask = stencil_compare_mask,
+ .stencil_ref = stencil_ref,
+ };
+ const VkPipeline pipeline = FindOrEmplaceClearStencilPipeline(key);
+ const VkPipelineLayout layout = *clear_color_pipeline_layout;
+ scheduler.RequestRenderpass(dst_framebuffer);
+ scheduler.Record([pipeline, layout, clear_depth, dst_region](vk::CommandBuffer cmdbuf) {
+ cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
+ BindBlitState(cmdbuf, dst_region);
+ cmdbuf.PushConstants(layout, VK_SHADER_STAGE_FRAGMENT_BIT, clear_depth);
+ cmdbuf.Draw(3, 1, 0, 0);
+ });
+ scheduler.InvalidateState();
+}
+
void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
const VkPipelineLayout layout = *one_texture_pipeline_layout;
@@ -820,6 +844,61 @@ VkPipeline BlitImageHelper::FindOrEmplaceClearColorPipeline(const BlitImagePipel
return *clear_color_pipelines.back();
}
+VkPipeline BlitImageHelper::FindOrEmplaceClearStencilPipeline(
+ const BlitDepthStencilPipelineKey& key) {
+ const auto it = std::ranges::find(clear_stencil_keys, key);
+ if (it != clear_stencil_keys.end()) {
+ return *clear_stencil_pipelines[std::distance(clear_stencil_keys.begin(), it)];
+ }
+ clear_stencil_keys.push_back(key);
+ const std::array stages = MakeStages(*clear_color_vert, *clear_stencil_frag);
+ const auto stencil = VkStencilOpState{
+ .failOp = VK_STENCIL_OP_KEEP,
+ .passOp = VK_STENCIL_OP_REPLACE,
+ .depthFailOp = VK_STENCIL_OP_KEEP,
+ .compareOp = VK_COMPARE_OP_ALWAYS,
+ .compareMask = key.stencil_compare_mask,
+ .writeMask = key.stencil_mask,
+ .reference = key.stencil_ref,
+ };
+ const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .depthTestEnable = VK_FALSE,
+ .depthWriteEnable = key.depth_clear,
+ .depthCompareOp = VK_COMPARE_OP_ALWAYS,
+ .depthBoundsTestEnable = VK_FALSE,
+ .stencilTestEnable = VK_TRUE,
+ .front = stencil,
+ .back = stencil,
+ .minDepthBounds = 0.0f,
+ .maxDepthBounds = 0.0f,
+ };
+ clear_stencil_pipelines.push_back(device.GetLogical().CreateGraphicsPipeline({
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .pNext = nullptr,
+ .flags = 0,
+ .stageCount = static_cast<u32>(stages.size()),
+ .pStages = stages.data(),
+ .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .pTessellationState = nullptr,
+ .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .pDepthStencilState = &depth_stencil_ci,
+ .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO,
+ .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .layout = *clear_color_pipeline_layout,
+ .renderPass = key.renderpass,
+ .subpass = 0,
+ .basePipelineHandle = VK_NULL_HANDLE,
+ .basePipelineIndex = 0,
+ }));
+ return *clear_stencil_pipelines.back();
+}
+
void BlitImageHelper::ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass,
bool is_target_depth) {
if (pipeline) {
diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h
index 2976a7d91..dcfe217aa 100644
--- a/src/video_core/renderer_vulkan/blit_image.h
+++ b/src/video_core/renderer_vulkan/blit_image.h
@@ -27,6 +27,16 @@ struct BlitImagePipelineKey {
Tegra::Engines::Fermi2D::Operation operation;
};
+struct BlitDepthStencilPipelineKey {
+ constexpr auto operator<=>(const BlitDepthStencilPipelineKey&) const noexcept = default;
+
+ VkRenderPass renderpass;
+ bool depth_clear;
+ u8 stencil_mask;
+ u32 stencil_compare_mask;
+ u32 stencil_ref;
+};
+
class BlitImageHelper {
public:
explicit BlitImageHelper(const Device& device, Scheduler& scheduler,
@@ -64,6 +74,10 @@ public:
void ClearColor(const Framebuffer* dst_framebuffer, u8 color_mask,
const std::array<f32, 4>& clear_color, const Region2D& dst_region);
+ void ClearDepthStencil(const Framebuffer* dst_framebuffer, bool depth_clear, f32 clear_depth,
+ u8 stencil_mask, u32 stencil_ref, u32 stencil_compare_mask,
+ const Region2D& dst_region);
+
private:
void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
const ImageView& src_image_view);
@@ -76,6 +90,8 @@ private:
[[nodiscard]] VkPipeline FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key);
[[nodiscard]] VkPipeline FindOrEmplaceClearColorPipeline(const BlitImagePipelineKey& key);
+ [[nodiscard]] VkPipeline FindOrEmplaceClearStencilPipeline(
+ const BlitDepthStencilPipelineKey& key);
void ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass, bool is_target_depth);
@@ -108,6 +124,7 @@ private:
vk::ShaderModule blit_depth_stencil_frag;
vk::ShaderModule clear_color_vert;
vk::ShaderModule clear_color_frag;
+ vk::ShaderModule clear_stencil_frag;
vk::ShaderModule convert_depth_to_float_frag;
vk::ShaderModule convert_float_to_depth_frag;
vk::ShaderModule convert_abgr8_to_d24s8_frag;
@@ -122,6 +139,8 @@ private:
std::vector<vk::Pipeline> blit_depth_stencil_pipelines;
std::vector<BlitImagePipelineKey> clear_color_keys;
std::vector<vk::Pipeline> clear_color_pipelines;
+ std::vector<BlitDepthStencilPipelineKey> clear_stencil_keys;
+ std::vector<vk::Pipeline> clear_stencil_pipelines;
vk::Pipeline convert_d32_to_r32_pipeline;
vk::Pipeline convert_r32_to_d32_pipeline;
vk::Pipeline convert_d16_to_r16_pipeline;
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index a8540339d..35bf80ea3 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -126,7 +126,7 @@ struct FormatTuple {
{VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1R5G5B5_UNORM
{VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10_UNORM
{VK_FORMAT_A2B10G10R10_UINT_PACK32, Attachable | Storage}, // A2B10G10R10_UINT
- {VK_FORMAT_A2R10G10B10_UNORM_PACK32, Attachable | Storage}, // A2R10G10B10_UNORM
+ {VK_FORMAT_A2R10G10B10_UNORM_PACK32, Attachable}, // A2R10G10B10_UNORM
{VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1B5G5R5_UNORM (flipped with swizzle)
{VK_FORMAT_R5G5B5A1_UNORM_PACK16}, // A5B5G5R1_UNORM (specially swizzled)
{VK_FORMAT_R8_UNORM, Attachable | Storage}, // R8_UNORM
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index aa59889bd..032f694bc 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -428,15 +428,27 @@ void RasterizerVulkan::Clear(u32 layer_count) {
if (aspect_flags == 0) {
return;
}
- scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil,
- clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) {
- VkClearAttachment attachment;
- attachment.aspectMask = aspect_flags;
- attachment.colorAttachment = 0;
- attachment.clearValue.depthStencil.depth = clear_depth;
- attachment.clearValue.depthStencil.stencil = clear_stencil;
- cmdbuf.ClearAttachments(attachment, clear_rect);
- });
+
+ if (use_stencil && regs.stencil_front_mask != 0xFF && regs.stencil_front_mask != 0) {
+ Region2D dst_region = {
+ Offset2D{.x = clear_rect.rect.offset.x, .y = clear_rect.rect.offset.y},
+ Offset2D{.x = clear_rect.rect.offset.x + static_cast<s32>(clear_rect.rect.extent.width),
+ .y = clear_rect.rect.offset.y +
+ static_cast<s32>(clear_rect.rect.extent.height)}};
+ blit_image.ClearDepthStencil(framebuffer, use_depth, regs.clear_depth,
+ static_cast<u8>(regs.stencil_front_mask), regs.clear_stencil,
+ regs.stencil_front_func_mask, dst_region);
+ } else {
+ scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil,
+ clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) {
+ VkClearAttachment attachment;
+ attachment.aspectMask = aspect_flags;
+ attachment.colorAttachment = 0;
+ attachment.clearValue.depthStencil.depth = clear_depth;
+ attachment.clearValue.depthStencil.stencil = clear_stencil;
+ cmdbuf.ClearAttachments(attachment, clear_rect);
+ });
+ }
}
void RasterizerVulkan::DispatchCompute() {
@@ -830,7 +842,8 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info,
}
const u32 buffer_size = static_cast<u32>(buffer_operand.pitch * buffer_operand.height);
static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize;
- const auto post_op = VideoCommon::ObtainBufferOperation::DoNothing;
+ const auto post_op = IS_IMAGE_UPLOAD ? VideoCommon::ObtainBufferOperation::DoNothing
+ : VideoCommon::ObtainBufferOperation::MarkAsWritten;
const auto [buffer, offset] =
buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op);
@@ -839,8 +852,12 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info,
const std::span copy_span{&copy, 1};
if constexpr (IS_IMAGE_UPLOAD) {
+ texture_cache.PrepareImage(image_id, true, false);
image->UploadMemory(buffer->Handle(), offset, copy_span);
} else {
+ if (offset % BytesPerBlock(image->info.format)) {
+ return false;
+ }
texture_cache.DownloadImageIntoBuffer(image, buffer->Handle(), offset, copy_span,
buffer_operand.address, buffer_size);
}
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index e9ec91265..a40825c9f 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -243,6 +243,9 @@ public:
/// Create channel state.
void CreateChannel(Tegra::Control::ChannelState& channel) final override;
+ /// Prepare an image to be used
+ void PrepareImage(ImageId image_id, bool is_modification, bool invalidate);
+
std::recursive_mutex mutex;
private:
@@ -387,9 +390,6 @@ private:
/// Synchronize image aliases, copying data if needed
void SynchronizeAliases(ImageId image_id);
- /// Prepare an image to be used
- void PrepareImage(ImageId image_id, bool is_modification, bool invalidate);
-
/// Prepare an image view to be used
void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate);
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index adde96aa5..617417040 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -71,6 +71,11 @@ constexpr std::array R8G8B8_SSCALED{
VK_FORMAT_UNDEFINED,
};
+constexpr std::array VK_FORMAT_R32G32B32_SFLOAT{
+ VK_FORMAT_R32G32B32A32_SFLOAT,
+ VK_FORMAT_UNDEFINED,
+};
+
} // namespace Alternatives
enum class NvidiaArchitecture {
@@ -103,6 +108,8 @@ constexpr const VkFormat* GetFormatAlternatives(VkFormat format) {
return Alternatives::R16G16B16_SSCALED.data();
case VK_FORMAT_R8G8B8_SSCALED:
return Alternatives::R8G8B8_SSCALED.data();
+ case VK_FORMAT_R32G32B32_SFLOAT:
+ return Alternatives::VK_FORMAT_R32G32B32_SFLOAT.data();
default:
return nullptr;
}
@@ -130,6 +137,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica
VK_FORMAT_A2B10G10R10_UINT_PACK32,
VK_FORMAT_A2B10G10R10_UNORM_PACK32,
VK_FORMAT_A2B10G10R10_USCALED_PACK32,
+ VK_FORMAT_A2R10G10B10_UNORM_PACK32,
VK_FORMAT_A8B8G8R8_SINT_PACK32,
VK_FORMAT_A8B8G8R8_SNORM_PACK32,
VK_FORMAT_A8B8G8R8_SRGB_PACK32,